import requests
from bs4 import BeautifulSoup
'''
爬取豆瓣电影排行榜数据
'''
# url = "https://movie.douban.com/chart"
# heards = {
#     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36"
# }
# resp = requests.get(url=url, headers=heards)
# t = resp.text
#
# page = BeautifulSoup(t, "html.parser") #指定html解析器
#
# # div = page.find("div", class_="", id="")
# div = page.find("div", attrs={"class": "", "id": ""})
#
# table = div.findAll("table", width="100%", class_="")
#
# for i in table:
#     print(i.find("a", class_="").text.strip().replace(" ", "").replace("\n",""))
#     print(i.find("span", class_="pl").text)
#     print(i.find("p" ,class_="pl").text)

'''
爬取唯美壁纸的图片并保存
'''

url = "http://www.netbian.com/weimei/"
heards = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36"
}

resp = requests.get(url=url, headers=heards)
resp.encoding = "gbk"
t = resp.text
page = BeautifulSoup(t, "html.parser")
re1 = page.find("div", class_="list")

re2 = re1.findAll("a", target="_blank")[6:]
childhref = []
childtext = []
for i in re2:
    childhref.append(url[:-7]+i.get("href")[1:])
    childtext.append(i.text)
i = 0
down = []
import time
for iurl in childhref:
    child_page_resp = requests.get(url=iurl, headers=heards)
    child_page_resp.encoding = "gbk"
    child_page = BeautifulSoup(child_page_resp.text, "html.parser")
    re3 = child_page.find("img", title=str(childtext[i]))
    down.append(re3.get("src"))
    i += 1
    if i == 3:
        break
k = 0
for i in down:
    img_resp = requests.get(url=i, headers=heards)

    image_name = str(childtext[k])
    k += 1
    with open("爬虫/文件/"+image_name+".jpg", mode="wb") as f:
        f.write(img_resp.content)
    # time.sleep(2)